/*
 * Copyright @ 2020 Joshua Branson <jbranso@dismail.de>
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or (at
 * your option) any later version.
 *
 * It is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * This program will attempt to decrypt a caesar cipher.  It uses a
 * very simple algorithm: It will try to find the most common letter
 * and assume that letter must be 'e'.  Note decrypt only works for
 * English.
 * https://inventwithpython.com/hacking/chapter7.html 
 * http://practicalcryptography.com/ciphers/caesar-cipher/ 
 * https://www.xarg.org/2010/05/cracking-a-caesar-cipher/
 * there's a 20 line algorithm here that works. */
/* https://en.wikipedia.org/wiki/Letter_frequency#Relative_frequencies_of_letters_in_the_English_language */
/* 
   a 8.167%     b 1.492%     c 2.782%     d 4.253%     e 12.702%     f 2.228%     g 2.015% 	
   h 6.094%     i 6.966%     j 0.153%     k 3.872%     l 4.025%      m 2.406%     n 6.749% 	
   o 7.507%     p 1.929%     q 0.095%     r 5.987%     s 6.327%      t 9.256%     u 2.758% 	
   v 0.978%     w 5.370%     x 0.150%     y 3.978%     z 0.074%

   There are also relative frequencies of the first letter in a word
   There are also the most common double letters:  LL EE SS OO TT FF RR NN PP CC
                                                                    
   GNU decrypt is distributed in the hope that it will be useful,        
   but WITHOUT ANY WARRANTY; without even the implied warranty of      
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the       
   GNU General Public License for more details.                        

   This is still fairly easy to fool though:

   echo "This will fool decrypt." |./caesar | ./decrypt
*/

#include <ctype.h>   //tolower
#include "encrypt.h"

#define BUFFER_SIZE 128
/* This is the length of bytes needed to store all the ASCII letters */ 
#define ASCII_LENGTH BUFFER_SIZE 

#define PRINT_MAX_OCCURANCES  extern int letter_count [];       \
  for (int i = 'a'; i <= 'z'; i++)                              \
    printf ("%c: %d\n", (int) 'a' + i, letter_count[i]);        

#define ORIGINAL_SHIFT(c)  ((c < 'e') ? ((26 - ('e' - c))) : abs ('e' - c)) 

char fileName [BUFFER_SIZE];
char original_buffer [BUFFER_SIZE * sizeof (char)];
/*
 * This will store the frequencies of letters.  Letter_count ['a']
 * will store the number of times the letter [aA] is found in the
 * buffer.  letter_count['b'] is the number of times [bB] appeared in
 * the buffer and so on.
 */
int letter_count [ASCII_LENGTH];

static const struct argp_option options [] =
  {
   {"file" , 'f', "FILE", 0, "Output the decrypted FILE." },
   { 0 }
  };

//define an argp parse function
error_t argp_parser (int opt, char *arg, struct argp_state *state)
{
  extern char fileName [];
  switch (opt)
    {
      // if this parser function is called on an option that it doesn't recognize, then don't do anything.
    default:
      return ARGP_ERR_UNKNOWN;
    case 'f':
      {
        memcpy (fileName, arg, strlen (arg));
        break;
      }
    }
  return 0;
}

/* a string containing the basic usage of this program. */
struct argp argp =
  {
   options, argp_parser, 0,
   "A simple program to decrypt a caesar cipher."
  };

/* 
   This function will store the first 128 bytes of input into a
   temporary buffer, so that we can later examine what is the most
   used character in that buffer.  (It has to do store some input in a
   temp buffer, because one cannot fseek on stdin).  If the stream is
   longer than 128, then this function will return 1.  Otherwise 0.
   If it returns 1, then you will need to keep putcharing the rest of
   the file stream.  If it returns 0, then the stream was shorter than
   128, so you can just use original_buffer to decrypt it.

   This will also store the number of times letters from buffer appear
   in the variable: letter_count.
*/

int store_input_in_buffer_and_letter_count (FILE * stream)
{
  extern char original_buffer [];
  extern int letter_count [];
  char c;
  int i;
  /* change the || to a && and you get an infinite loop. Why? */
  for (i = 0; (i < BUFFER_SIZE) && ((c = getc(stream)) != EOF); i++)
    {
      original_buffer[i] = c;
      if (isalpha (c))
        letter_count[tolower (c)] += 1;
    }
  ungetc (original_buffer[i], stream);
  original_buffer[i] = '\0';
  return (i < (BUFFER_SIZE + 1)) ? 1 : 0; 
}

/* return the "corrective" shift of the caesar cipher. */
int discover_shift ()
{
  extern int letter_count [];
  int max, most_used_char = 0;
  for (int i = 'a'; i <= 'z'; i++)
    if (letter_count[i] > max)
      {
        max = letter_count[i];
        most_used_char = i; 
      }
  return abs (ORIGINAL_SHIFT (most_used_char) - 26);
}

void decrypt (FILE * stream)
{
  extern char original_buffer [];
  int keep_decrypting = store_input_in_buffer_and_letter_count (stream);
  int shift = discover_shift ();
  /* Glibc lets you create streams from regions of memory.  That is
     useful, because I have such a region of memory:
     original_buffer. */
  FILE * stream_memory_buffer;
  stream_memory_buffer = fmemopen (original_buffer, BUFFER_SIZE, "r");
  encrypt (shift, stream_memory_buffer);
  if (keep_decrypting)
    encrypt (shift, stream);
}

int main (int argc, char **argv)
{
  argp_parse (&argp, argc, argv, 0, 0, 0);
  FILE * stream = maybe_open_file ();
  decrypt (stream);
  fclose (stream);
  return 0;
}

